hw7

Author

Ryan Klein

Homework 7

Import Packages

import altair as alt
import pandas as pd
from altair import datum
alt.data_transformers.disable_max_rows()
DataTransformerRegistry.enable('default')

Part 1

gas_gap_data_url = "https://calvin-data304.netlify.app/data/pump_price_for_gasoline_us_per_liter.csv"
gas_gap_data = pd.read_csv(gas_gap_data_url)
gas_gap_data.head()
country 1991 1992 1993 1994 1995 1996 1997 1998 1999 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
0 Afghanistan NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN 1.05 NaN 1.15 NaN 1.28 NaN 1.07 NaN 0.7
1 Angola NaN NaN NaN NaN NaN NaN NaN 0.38 NaN ... NaN 0.53 NaN 0.65 NaN 0.63 NaN 0.76 NaN 0.97
2 Albania NaN NaN NaN NaN NaN NaN NaN 0.86 NaN ... NaN 1.36 NaN 1.46 NaN 1.81 NaN 1.76 NaN 1.36
3 Andorra NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN 1.24 NaN 1.49 NaN 1.67 NaN 1.51 NaN NaN
4 UAE NaN NaN NaN NaN NaN NaN NaN 0.23 NaN ... NaN 0.45 NaN 0.47 NaN 0.47 NaN 0.47 NaN 0.49

5 rows × 27 columns

We account for several label discrepencies between the datasets.

gas_gap_data.at[35, 'country'] = "Dem. Rep. Congo"
gas_gap_data.at[172, 'country'] = "United States of America"
gas_gap_data.at[28, 'country'] = "Central African Rep."
gas_gap_data.at[58, 'country'] = "United Kingdom"
gas_gap_data.at[152, 'country'] = "S. Sudan"
gas_gap_data.at[85, 'country'] = "Kyrgyzstan"
gas_gap_data.at[19, 'country'] = "Bosnia and Herz."
gas_gap_data.at[33, 'country'] = "Côte d'Ivoire"
gas_gap_data.at[4, 'country'] = "United Arab Emirates"
gas_gap_data.at[46, 'country'] = "Dominican Rep."
gas_gap_data.at[108, 'country'] = "Macedonia"
countries = alt.topo_feature('https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json', feature='countries')
country_map = alt.Chart(countries).mark_geoshape(
    fill='#aaaaaa',
    stroke='#000000'
).project('mercator')

country_map.properties(width = 600, height = 400)
country_map.transform_lookup(
  lookup='properties.name',
  from_=alt.LookupData(gas_gap_data, 'country', ['2012'])
  ).encode(
    fill = "2012:Q",
    tooltip = ["properties.name:O", "2012:Q"]
    ).properties(width = 600, height = 400, title="Gas rate: US $ per liter")

Part 2

democracy_url = "https://calvin-data304.netlify.app/data/wvs.csv"
democracy_data = pd.read_csv(democracy_url)

Wrangle the total number of respondants for each nation

respondants_per_country = pd.DataFrame(democracy_data.value_counts("country"))
respondants_per_country.reset_index(inplace=True)
respondants_per_country = respondants_per_country.rename(columns={0:"total"})
base = alt.Chart(respondants_per_country).encode(
    alt.X(field ='country', type="ordinal", sort="-y"),
    alt.Y(field ='total', type="quantitative")
).properties(width=300,height=300,title="Number of Respondants")
base.mark_bar()

Part 3

age3_facet = alt.Chart(democracy_data).mark_boxplot(extent="min-max").encode(
    alt.X(field = "age", type = "quantitative", title = "Age in Years"),
    alt.Y(field = "age3", type = "nominal"),
    alt.Color("age3:N")
    #alt.Tooltip(["min(age)", "max(age)"])
).properties(
  width = 300, height = 75
).facet(
  facet = "country:O",
  columns=3
)
age3_facet
age6_facet = alt.Chart(democracy_data).mark_boxplot(extent="min-max").encode(
    alt.X(field = "age", type = "quantitative", title = "Age in Years"),
    alt.Y(field = "age6", type = "nominal"),
    alt.Color("age6:N")
    #alt.Tooltip(["min(age)", "max(age)"])
).properties(
  width = 300, height = 150
).facet(
  facet = "country:O",
  columns=3
)
age6_facet